
## assumes that the working directory is the top dir of the archive

setwd("/WordSearchResults")
dat <- read.csv("text/rawdata/WordSearchResults/mentions.csv")


dim(dat)

head(dat)

sort(table(dat$name))

plot(table(dat$year[dat$name=="Rwanda"]))
plot(table(dat$year[dat$name=="Afghanistan"]))
plot(table(dat$year[dat$name=="United States of America"]))
plot(table(dat$year[dat$name=="Italy"]))
plot(table(dat$year[dat$name=="France"]))

## The overall number of mentions is increasing, so I need to standardize
plot(table(dat$year))


## make a date variable
mydate = rep(NA,nrow(dat))
mymonth = rep(NA,nrow(dat))
for(i in 1:length(mydate)){
  if(i%%10000==0){print(i)}
  mydate[i] <- (paste(dat$year[i],"-",dat$month[i],"-",dat$day[i],sep=""))
  mymonth[i] <- (paste(dat$year[i],"-",dat$month[i],"-15",sep=""))
}
mydate[1:100]
dat$date <- as.Date(mydate)
dat$date2 <- as.Date(mymonth)
dat$datenum <- as.numeric(dat$date)

## make a daily percentage
myname <- "Rwanda"
tmp <- table(dat$date)
denom <- tmp
tmp2 <- table(dat$date[dat$name==myname])
tmp[tmp>0] <- 0
tmp[names(tmp2)] <- tmp2

plot(as.Date(names(table(dat$date2))),
     table(dat$date2), 
     type="n",ylim=c(0,1))
points(as.Date(names(tmp[tmp>0])),
     (tmp[tmp>0]/denom[tmp>0]), 
     col="red", pch=".")
points(as.Date(names(tmp)),
     (tmp/denom), 
     col="red", pch=".")
lines(ksmooth(x=as.Date(names(tmp[tmp>0])),y=(tmp[tmp>0]/denom[tmp>0]),
       "normal",bandwidth=100))


## 
denom

## I need to make a denominator that is all dates, not just all dates that
## have press releases

everydaynums <- as.Date(names(denom[1])):as.Date(names(denom[length(denom)]))
everyday <- rep(0,length(everydaynums))
names(everyday) <- everydaynums
length(everyday)
length(denom)

daycounts <- denom
## Get the daynumbers as the names
names(daycounts) <- as.numeric(as.Date(names(denom)))
daycounts

## fill in the nonzero daycounts
everyday[names(daycounts)] <- daycounts
everyday

## Make a function that adds the zeros

timegaps <- function(x){
  ## assumes there is a dat$date variable
  denom <- table(dat$date)
  everydaynums <- as.Date(names(denom[1])):as.Date(names(denom[length(denom)]))
  everyday <- rep(0,length(everydaynums))
  names(everyday) <- everydaynums
  ## this is where the x enters in
  daycounts <- x
  ## Get the daynumbers as the names
  names(daycounts) <- as.numeric(as.Date(names(x)))
  ## fill in the nonzero daycounts
  everyday[names(daycounts)] <- daycounts
  return(everyday)
}

timegaps(table(dat$date[dat$name=="Rwanda"]))
  

## This plots the total counts by day WITH all the zeros
plot(as.Date(names(table(dat$date2))),
     table(dat$date2), 
     type="n",ylim=c(0,.25))
all <- timegaps(table(dat$date))
rwanda <- timegaps(table(dat$date[dat$name=="Rwanda"]))
div <- rwanda/all
div[is.nan(div)] <- 0
points(x=as.numeric(names(all)),y=div,pch=".")
lines(ksmooth(x=as.numeric(names(div)),y=div,
       "normal",bandwidth=10))


## a plot for the USA

## make a canned plotting function
plot.mentions <- function(countryname,ylim=c(0,.25), bandwidth=10){
  plot(as.Date(names(table(dat$date2))),
     table(dat$date2), 
     type="n",ylim=ylim,main=countryname)
  all <- timegaps(table(dat$date))
  cname <- timegaps(table(dat$date[dat$name==countryname]))
  div <- cname/all
  div[is.nan(div)] <- 0
  points(x=as.numeric(names(all)),y=div,pch=".")
  lines(ksmooth(x=as.numeric(names(div)),y=div,
       "normal",bandwidth=bandwidth))
}

#plot.mentions("Japan", bandwidth=200)
#plot.mentions("Afghanistan", bandwidth=50)
#plot.mentions("Iraq", bandwidth=50)
#plot.mentions("United States of America", bandwidth=50)



## This is actually a slightly different plot that doesn't try to
## add the zeros in

## mentions per day
plot(as.Date(names(table(dat$date2))),
     table(dat$date2), 
     type="n",ylim=c(0,700))

## make a daily percentage
myname <- "Rwanda"
tmp <- everyday
denom <- tmp
tmp2 <- table(dat$date[dat$name==myname])
tmp[tmp>0] <- 0
tmp[names(tmp2)] <- tmp2

plot(as.Date(names(table(dat$date2))),
     table(dat$date2), 
     type="n",ylim=c(0,1))
points(as.Date(names(tmp)),
     (tmp[tmp>0]/denom[tmp>0]), 
     col="red", pch=".")
points(as.Date(names(tmp)),
     (tmp/denom), 
     col="red", pch=".")
lines(ksmooth(x=as.Date(names(tmp[tmp>0])),y=(tmp[tmp>0]/denom[tmp>0]),
       "normal",bandwidth=100))


###################################################################
## Now, look at how mentions respond to ratification

##
art22 <- read.csv("text/rawdata/csv files with search names\\Art22ratdates.csv", header=F, as.is=T)

head(art22)
ratdat <- art22

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")


dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
## old data format
#dateholder <- rep(NA,nrow(art22))
#for(i in 1:nrow(art22)){
#  mydate <- as.character(art22$V2[i])
#  mydate <- strsplit(mydate," ")[[1]]
#  mymonth <- names(mos)[mos %in% mydate[2]]
#  dateholder[i] <- (paste(mydate[3],"-",mymonth,"-",mydate[1],sep=""))
#}

art22$date <- as.Date(dateholder)
art22$datenum <- as.numeric(as.Date(dateholder))
 
head(art22) 

## Now, I can pull out all mentions around the ratification date

myrow <- art22[1,]
myrow$date
countryname <- as.character(myrow[1])
all <- timegaps(table(dat$date))
cname <- timegaps(table(dat$date[dat$name==countryname]))
div <- cname/all
div[is.nan(div)] <- 0

div[as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
daterange <- div[as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]

plot(x = as.numeric(names(daterange)),y=daterange,pch="^")
abline(v = as.numeric(myrow$date))

## is that a mention of ratification???
dat$datenum <- as.numeric(dat$date)
as.numeric(myrow$date)
dat[dat$name=="Algeria",]

## It's this one "1989-0677IP.txt"

dat$veryclose[dat$briefname=="1989-0677IP.txt" & dat$name=="Algeria"]

## Look at the words before and after
# BEFORE
text1 <- dat$veryclose[dat$name=="Algeria" & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
# AFTER
text2 <- dat$veryclose[dat$name=="Algeria" & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]

text1 <- as.character(text1)
btext <- c()
for(i in 1:length(text1)){
  btext <- paste(btext,text1[i])
}
btext
sort(table(strsplit(btext," ")[[1]]))

text2 <- as.character(text2)
atext <- c()
for(i in 1:length(text2)){
  atext <- paste(atext,text2[i])
}
atext
sort(table(strsplit(atext," ")[[1]]))



######################################
Things to do

1. Look at frequency of mentions
2. Look at the word "visit" in mentions to see if it goes up
3. Loot at whether the sentiment of words
4. See if there are any words following ratification that 
   increase frequency significantly.  What are they?

5. Identify all specific praise for ratifying the specific treaties.


## Read in the sentiment analysis dictionary

sent <- readLines("text/rawdata\\sentiment analysis dictionary\\hltemnlp05clues\\subjclueslen1-HLTEMNLP05.txt")
sent

sentdat <- data.frame(row(as.matrix(sent)))
names(sentdat) <- "num"
sentdat$type <- NA
sentdat$len <- NA
sentdat$word1 <- NA
sentdat$pos1 <- NA
sentdat$stemmed1 <- NA
sentdat$priorpolarity <- NA
head(sentdat)

for(i in 1:length(sent)){
  if(i %% 100 ==0){print(i)}
  myrow <- strsplit(sent[i]," ")[[1]]
  for(j in 1:length(myrow)){
    sentdat[i,strsplit(myrow[j],"=")[[1]][1]] <- strsplit(myrow[j],"=")[[1]][2]
  }
}
  

sentdat$mpqapolarity
sentdat$polarity
sentdat[7078,]
sentdat[3749,]

## Take the word "will" out of the list because I don't like it
sentdat[sentdat$word1=="will",]
sentdat <- sentdat[-which(sentdat$word1=="will"),]


dim(dat)
## check how often the briefings used these words

## TAKES A WHILE:  10 mins on a fast processor
## counter goes up to 250,000
has.sent.word <- rep(NA,nrow(dat))
for(i in 1:nrow(dat)){
  if(i %% 1000 == 0){print(i)}
  has.sent.word[i] <- sum(strsplit(as.character(dat$veryclose[i])," ")[[1]] %in% sentdat$word1)
}

sum(has.sent.word[1:1000])
has.sent.word[1:1000]
table(has.sent.word)

#> table(has.sent.word)
#has.sent.word
#     0      1      2      3      4      5      6      7 
#140502  71143  28866   8161   1603    207     18      3


## What proportion of mentions have words on the list?
table(has.sent.word)/nrow(dat)

#> table(has.sent.word)/nrow(dat)
#has.sent.word
#           0            1            2            3            4            5 
#5.608795e-01 2.840006e-01 1.152322e-01 3.257845e-02 6.399125e-03 8.263374e-04 
#           6            7 
#7.185543e-05 1.197590e-05 


############################
## What are the actual words that are being used?


wordcounts <- rep(0,length(sentdat$word1))
names(wordcounts) <- sentdat$word1
wordcounts

vctext <- as.character(dat$veryclose[which(has.sent.word>0)])
## counter goes to 110,000
for(i in 1:length(vctext)){
  if(i %% 1000 == 0){print(i)}
  wrds <- strsplit(vctext[i]," ")[[1]][(strsplit(vctext[i]," ")[[1]] %in% sentdat$word1)]
  wordcounts[wrds] <- wordcounts[wrds] + 1
}

## this vector should have all the wordcounts now
wordcounts
wordcounts[order(wordcounts)]

pp <- sentdat$priorpolarity
names(pp) <- sentdat$word1

cbind(pp[names(wordcounts)[order(wordcounts)]], wordcounts[order(wordcounts)])

colorz <- pp[names(wordcounts)]
colorz[colorz=="positive"] <- "0000FF"
colorz[colorz=="negative"] <- "FF0000"
colorz[colorz=="neutral"] <- "888888"
colorz[colorz=="both"] <- "888888"
colorz[colorz=="weakneg"] <- "888888"
table(colorz)

## Output it to wordle format
  out <- c()
  for(i in 1:length(wordcounts)){
    out <- rbind(out,paste(names(wordcounts)[i],wordcounts[i],colorz[i],sep=":"))
  }

  ## make a file with the word frequencies for wordle

  filename <- "text/wordle_6dec2011.txt"
  write.table(out,filename,row.names=F,quote=F,col.names=F)


  ## This file can be pasted into http://www.wordle.net/advanced to make a word cloud


## Now, I want to capture the sentiment of each mention
sentiment <- rep(NA,nrow(dat))
sentimentwords <- rep(NA,nrow(dat))

## counter goes to 250503
for(i in 1:nrow(dat)){
  if(i %%10000 == 0){print(i)}
  #if(sum(strsplit(as.character(dat$veryclose[i])," ")[[1]] %in% sentdat$word1)>0){
  if(has.sent.word[i]>0){
    wordz <- strsplit(as.character(dat$veryclose[i])," ")[[1]] 
    swordz <- wordz[wordz %in% sentdat$word1]
    sents <- sentdat$priorpolarity[sentdat$word1 %in% swordz]
    sents <- paste(sents, collapse=" ")
    swordz <- paste(swordz, collapse=" ")
    sentiment[i] <- sents
    sentimentwords[i] <- swordz
  }
}    

length(table(sentiment))

## now, we need to assign scores for the different levels
table(sentdat$priorpolarity)

## proportion positive
# both = 0, negative = 0, neutral = 0, positive = 1, weakneg = 0
## proportion negative
# both = 0, negative = 1, neutral = 0, positive = 0, weakneg = 0 (because there's only 1)

haspos <- rep(0,length(sentiment))
hasneg <- rep(0,length(sentiment))
ppos <- rep(NA,length(sentiment))
pneg <- rep(NA,length(sentiment))
for(i in 1:length(sentiment)){
  if(i %% 1000==0){print(i)}
  if(is.na(sentiment[i])==F){ 
    mtab <- table(strsplit(sentiment[i]," ")[[1]])
    poscount <- mtab["positive"]*1
    if(is.na(poscount)==F){
      haspos[i] <- 1
    }
    ppos[i] <- poscount/length(strsplit(as.character(dat$veryclose[i])," ")[[1]])
    negcount <- mtab["negative"]*1
    if(is.na(negcount)==F){
      hasneg[i] <- 1
    }
    pneg[i] <- negcount/length(strsplit(as.character(dat$veryclose[i])," ")[[1]])
  }
}

## 
ppos
ppos1 <- ppos
ppos[is.na(ppos)] <- 0    
ppos
hist(ppos)



dir.create("text/madedata")
save.image("text/madedata/mention workspace1.RData")
## Keep skipping down

pneg1 <- pneg
pneg[is.na(pneg)] <- 0
pneg
hist(pneg)

table(haspos)
table(hasneg)
cor(haspos,hasneg)

summary(lm(haspos~hasneg))

## NOW, I need to match it up to the ratification dates and 
## see if there is a bump in positivity following ratification!


##############################3
## ARt 22
nrow(art22)

mention.holder <-matrix(NA,nrow(art22),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(art22)){
  print(rr)
  myrow <- art22[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}

hist(mention.holder[,4])
t.test(mention.holder[,4])

mention.holder.art22 <- mention.holder


#######################
## Now for the CAT

##
ratdat <- read.csv("text/rawdata/csv files with search names\\CATratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}

hist(mention.holder[,4], breaks=30)
t.test(mention.holder[,4])


mention.holder[order(mention.holder[,2]),]

mention.holder[order(mention.holder[,4]),c(1,4)]

mention.holder.cat <- mention.holder


#######################
## Now for the ICCPR

##
ratdat <- read.csv("C:\\Users\\Rich\\Desktop\\Papers\\Rewards for Ratification\\text analysis\\csv files with search names\\ICCPRratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}

hist(mention.holder[,4], breaks=30)
t.test(mention.holder[,4])


mention.holder[order(mention.holder[,2]),]

mention.holder[order(mention.holder[,4]),c(1,4)]

mention.holder.iccpr <- mention.holder


#######################
## Now for the OP1

##
ratdat <- read.csv("C:\\Users\\Rich\\Desktop\\Papers\\Rewards for Ratification\\text analysis\\csv files with search names\\Op1ratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")


dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}

hist(mention.holder[,4], breaks=30)
t.test(mention.holder[,4])


mention.holder[order(mention.holder[,2]),]

mention.holder[order(mention.holder[,4]),c(1,4)]

mention.holder.op1 <- mention.holder


save.image("text/madedata/mention workspace2.RData")

# This tests whether "positive sentiment after ratification" - "positive sentiment before ratification" 
# is positive when we average across all ratifications of each treaty.

t.iccpr <- t.test(mention.holder.iccpr[,4])
t.op1 <- t.test(mention.holder.op1[,4])
t.cat <- t.test(mention.holder.cat[,4])
t.art22 <- t.test(mention.holder.art22[,4])
## Start the plot
dir.create("text/results")

pdf("text/results/positiveWords.pdf")
plot(0,0,type="n",ylim=c(.5,4.5),xlim=c(-.05,.05), axes=F,
     xlab="Average Change in Proportion of Positive Words",
     ylab="", main="Positive Words Don't Increase\nAfter Ratification")
axis(1)
axis(2,at=c(1,2,3,4),labels=c("Art. 22","CAT","Opt. 1","ICCPR"), las=2)
abline(v=0,lty=2,col="gray50")
segments(x0=t.iccpr$conf.int[1],x1=t.iccpr$conf.int[2],
         y0=4,y1=4)
segments(x0=t.op1$conf.int[1],x1=t.op1$conf.int[2],
         y0=3,y1=3)
segments(x0=t.cat$conf.int[1],x1=t.cat$conf.int[2],
         y0=2,y1=2)
segments(x0=t.art22$conf.int[1],x1=t.art22$conf.int[2],
         y0=1,y1=1)
points(t.iccpr$estimate, y=4,pch=21,bg="White")
points(t.op1$estimate, y=3,pch=21,bg="White")
points(t.cat$estimate, y=2,pch=21,bg="White")
points(t.art22$estimate, y=1,pch=21,bg="White")
dev.off()




#############################################
## Negative mentions decreasing?

##############################3
## ARt 22
nrow(art22)

mention.holder <-matrix(NA,nrow(art22),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(art22)){
  print(rr)
  myrow <- art22[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- pneg[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- pneg[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}
nmention.holder.art22 <- mention.holder


#######################
## Now for the CAT

##
ratdat <- read.csv("C:\\Users\\Rich\\Desktop\\Papers\\Rewards for Ratification\\text analysis\\csv files with search names\\CATratdates.csv", header=F, as.is=T)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- pneg[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- pneg[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}
nmention.holder.cat <- mention.holder


#######################
## Now for the ICCPR

##
ratdat <- read.csv("C:\\Users\\Rich\\Desktop\\Papers\\Rewards for Ratification\\text analysis\\csv files with search names\\ICCPRratdates.csv", header=F, as.is=T)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}
nmention.holder.iccpr <- mention.holder


#######################
## Now for the OP1

##
ratdat <- read.csv("C:\\Users\\Rich\\Desktop\\Papers\\Rewards for Ratification\\text analysis\\csv files with search names\\Op1ratdates.csv", header=F, as.is=T)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")


dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
mention.holder <-matrix(NA,nrow(ratdat),4)
mention.holder <- as.data.frame(mention.holder)
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after
  # BEFORE
  before <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):as.numeric(myrow$date))]
  # AFTER
  after <- ppos[dat$name==countryname & dat$datenum %in% as.character(as.numeric(myrow$date):(as.numeric(myrow$date)+365))]
  mention.holder[rr,1] <- countryname
  mention.holder[rr,2] <- mean(before)
  mention.holder[rr,3] <- mean(after)
  mention.holder[rr,4] <- mean(after) - mean(before)
}

nmention.holder.op1 <- mention.holder


save.image("text/madedata/mention workspace3.RData")

# This tests whether "negative sentiment after ratification" - "negative sentiment before ratification" 
# is positive when we average across all ratifications of each treaty.  

t.test(nmention.holder.iccpr[,4])
t.test(nmention.holder.op1[,4])
t.test(nmention.holder.cat[,4])
t.test(nmention.holder.art22[,4])


t.iccpr <- t.test(nmention.holder.iccpr[,4])
t.op1 <- t.test(nmention.holder.op1[,4])
t.cat <- t.test(nmention.holder.cat[,4])
t.art22 <- t.test(nmention.holder.art22[,4])
## Start the plot
pdf("text/results/negativeWords.pdf")
plot(0,0,type="n",ylim=c(.5,4.5),xlim=c(-.05,.05), axes=F,
     xlab="Average Change in Proportion of Negative Words",
     ylab="", main="Negative Words Don't Decrease\nAfter Ratification")
axis(1)
axis(2,at=c(1,2,3,4),labels=c("Art. 22","CAT","Opt. 1","ICCPR"), las=2)
abline(v=0,lty=2,col="gray50")
segments(x0=t.iccpr$conf.int[1],x1=t.iccpr$conf.int[2],
         y0=4,y1=4)
segments(x0=t.op1$conf.int[1],x1=t.op1$conf.int[2],
         y0=3,y1=3)
segments(x0=t.cat$conf.int[1],x1=t.cat$conf.int[2],
         y0=2,y1=2)
segments(x0=t.art22$conf.int[1],x1=t.art22$conf.int[2],
         y0=1,y1=1)
points(t.iccpr$estimate, y=4,pch=21,bg="White")
points(t.op1$estimate, y=3,pch=21,bg="White")
points(t.cat$estimate, y=2,pch=21,bg="White")
points(t.art22$estimate, y=1,pch=21,bg="White")
dev.off()





#############################################
## So now, my goal is to line all of the positive mentions up
## so that ratification is time 0 and actually
## plot each mention around that



#############################
## CAT
ratdat <- read.csv("text/rawdata\\csv files with search names\\CATratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
rr <- 9

m.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  mentions <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  m.holder[[rr]] <- mentions
}
  
c.holder
names(d.holder) <- c.holder
names(m.holder) <- c.holder
d.holder
m.holder

plot(unlist(d.holder),unlist(m.holder), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder),unlist(m.holder),"normal",bandwidth=10))

  ## save the holders for later
d.holder.cat <- d.holder
m.holder.cat <- m.holder



#############################
## Art 22
ratdat <- read.csv("text/rawdata\\csv files with search names\\Art22ratdates.csv", header=F, as.is=T)
head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
rr <- 9

m.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  mentions <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  m.holder[[rr]] <- mentions
}
  
c.holder
names(d.holder) <- c.holder
names(m.holder) <- c.holder
d.holder
m.holder

plot(unlist(d.holder),unlist(m.holder), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder),unlist(m.holder),"normal",bandwidth=10))

  ## save the holders for later
d.holder.art22 <- d.holder
m.holder.art22 <- m.holder



#############################
## ICCPR
ratdat <- read.csv("text/rawdata\\csv files with search names\\ICCPRratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}
ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
rr <- 9

m.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  mentions <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  m.holder[[rr]] <- mentions
}
  
c.holder
names(d.holder) <- c.holder
names(m.holder) <- c.holder
d.holder
m.holder

plot(unlist(d.holder),unlist(m.holder), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder),unlist(m.holder),"normal",bandwidth=10))

  ## save the holders for later
d.holder.iccpr <- d.holder
m.holder.iccpr <- m.holder



#############################
## OP1
ratdat <- read.csv("text/rawdata\\csv files with search names\\Op1ratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")


dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
head(ratdat) 

## This loop takes a while, like 15 min
rr <- 9

m.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  mentions <- ppos[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  m.holder[[rr]] <- mentions
}
  
c.holder
names(d.holder) <- c.holder
names(m.holder) <- c.holder
d.holder
m.holder

plot(unlist(d.holder),unlist(m.holder), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder),unlist(m.holder),"normal",bandwidth=10))

  ## save the holders for later
d.holder.op1 <- d.holder
m.holder.op1 <- m.holder

alarm()

save.image("text/madedata\\mention workspace4.RData")



par(mfrow=c(1,4))
plot(unlist(d.holder.iccpr),unlist(m.holder.iccpr), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder.iccpr),unlist(m.holder.iccpr),"normal",bandwidth=10),col="red")

plot(unlist(d.holder.op1),unlist(m.holder.op1), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder.op1),unlist(m.holder.op1),"normal",bandwidth=10),col="red")

plot(unlist(d.holder.cat),unlist(m.holder.cat), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder.cat),unlist(m.holder.cat),"normal",bandwidth=10),col="red")

plot(unlist(d.holder.art22),unlist(m.holder.art22), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
lines(ksmooth(unlist(d.holder.art22),unlist(m.holder.art22),"normal",bandwidth=10),col="red")


## Now make a plot with the curves for each country
par(mfrow=c(1,1))
plot(unlist(d.holder.iccpr),unlist(m.holder.iccpr), type="n", ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
for(i in 1:length(names(d.holder.iccpr))){
  cc <- names(d.holder.iccpr)[i]
  if(length(na.omit(d.holder.iccpr[[cc]]))>0){
    lines(ksmooth(na.omit(d.holder.iccpr[[cc]]),na.omit(m.holder.iccpr[[cc]]),"normal",bandwidth=10),col="#FF000030")
  }
}

######################################################
######################################################
## Searching for the word "visit"


visit <- rep(0,nrow(dat))
dat$veryclose[grep("visit", dat$veryclose)]
visit[grep("visit", dat$veryclose)] <- 1

visit
sum(visit)

## look at the uses of the word visit to see what they are
set.seed(1234)
visit.sample <- sample(as.character(dat$veryclose[which(visit==1)]), size=100)
visit.sample

## code them as to whether they are a diplomatic visit or not
is.v <- c(1,1,1,1,1,
          1,1,1,1,1,  # is about tourists visiting
          1,1,0,1,1,
          1,1,1,1,1,
          1,1,1,1,1, #20
          1,1,1,1,1,
          1,1,1,1,1, #30
          1,1,1,1,1,
          1,1,1,1,1, #40
          1,0,1,1,1,  # into line with european law language learning visits the european
          1,1,1,1,0, #50 # aid for language learning visits imposes unjustified restrictions on patent
          1,1,0,1,1,  # lithuania malta poland slovakia and for more information please visit
          0,1,1,1,1, #60 # and lifestyle also attract visitors clonakilty district amenities of natural
          1,1,1,1,1,
          1,1,1,1,1, #70
          1,1,1,1,1,
          1,1,1,1,1, #80
          1,1,1,1,1,
          1,1,1,1,1, #90
          1,1,0,1,1,  # persons visiting family or undergoing etc that is why it
          1)

t.test(is.v)

## TAKE OUT THE VISITS THAT HAVE THE WORD PLEASE

## first, make sure they are websites
dat$veryclose[grep("please visit", dat$veryclose)]

## It's the same, even if we don't use "please visit" together
dat$veryclose[which(visit==1)][grep("please",dat$veryclose[which(visit==1)])]

## Replace the zeros
visit[grep("please visit", dat$veryclose)] <- 0

visit
sum(visit)

## so then the correct proportion would be 
is.v <- c(1,1,1,1,1,
          1,1,1,1,1,  # is about tourists visiting
          1,1,0,1,1,
          1,1,1,1,1,
          1,1,1,1,1, #20
          1,1,1,1,1,
          1,1,1,1,1, #30
          1,1,1,1,1,
          1,1,1,1,1, #40
          1,0,1,1,1,  # into line with european law language learning visits the european
          1,1,1,1,0, #50 # aid for language learning visits imposes unjustified restrictions on patent
          1,1,1,1,  # lithuania malta poland slovakia and for more information please visit
          0,1,1,1,1, #60 # and lifestyle also attract visitors clonakilty district amenities of natural
          1,1,1,1,1,
          1,1,1,1,1, #70
          1,1,1,1,1,
          1,1,1,1,1, #80
          1,1,1,1,1,
          1,1,1,1,1, #90
          1,1,0,1,1,  # persons visiting family or undergoing etc that is why it
          1)

t.test(is.v)


#############################
## Art 22
ratdat <- read.csv("text/rawdata\\csv files with search names\\Art22ratdates.csv", header=F, as.is=T)
head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
## This loop takes a while, like 15 min

v.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  visits <- visit[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  v.holder[[rr]] <- visits
}
  
names(d.holder) <- c.holder
names(v.holder) <- c.holder

  ## save the holders for later
d.holder.art22 <- d.holder
v.holder.art22 <- v.holder


#############################
## CAT
ratdat <- read.csv("text/rawdata\\csv files with search names\\CATratdates.csv", header=F, as.is=T)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
## This loop takes a while, like 15 min

v.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  visits <- visit[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  v.holder[[rr]] <- visits
}
  
names(d.holder) <- c.holder
names(v.holder) <- c.holder

  ## save the holders for later
d.holder.cat <- d.holder
v.holder.cat <- v.holder


#############################
## ICCPR
ratdat <- read.csv("text/rawdata\\csv files with search names\\ICCPRratdates.csv", header=F, as.is=T)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")
dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
## This loop takes a while, like 15 min

v.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  visits <- visit[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  v.holder[[rr]] <- visits
}
  
names(d.holder) <- c.holder
names(v.holder) <- c.holder

  ## save the holders for later
d.holder.iccpr <- d.holder
v.holder.iccpr <- v.holder


#############################
## OP1
ratdat <- read.csv("text/rawdata\\csv files with search names\\Op1ratdates.csv", header=F, as.is=T)

head(ratdat)

## First item of business is to turn the dates in to a date column
mos <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
names(mos) <- c("01","02","03","04","05","06","07","08","09","10","11","12")


dateholder <- rep(NA,nrow(ratdat ))
for(i in 1:nrow(ratdat)){
  mydate <- as.character(ratdat$V2[i])
  mydate <- strsplit(mydate,"-")[[1]]
  mymonth <- names(mos)[mos %in% mydate[2]]
  if(as.numeric(mydate[3])>50){
    myyear <- paste("19",mydate[3],sep="")
  } else {
    myyear <- paste("20",mydate[3],sep="")
  }
  dateholder[i] <- (paste(myyear,"-",mymonth,"-",mydate[1],sep=""))
}

ratdat$date <- as.Date(dateholder)
ratdat$datenum <- as.numeric(as.Date(dateholder))
 
## This loop takes a while, like 15 min

v.holder <- c()
d.holder <- c()
c.holder <- c()
for(rr in 1:nrow(ratdat)){
  print(rr)
  myrow <- ratdat[rr,]
  myrow
  myrow$date
  countryname <- as.character(myrow[1])
  ## Look at the words before and after

  visits <- visit[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))]
  days <- dat$datenum[dat$name==countryname & dat$datenum %in% as.character((as.numeric(myrow$date)-365):(as.numeric(myrow$date)+365))] - as.numeric(myrow$date)
  
  ## put them in lists
  c.holder[[rr]] <- countryname
  d.holder[[rr]] <- days
  v.holder[[rr]] <- visits
}
  
names(d.holder) <- c.holder
names(v.holder) <- c.holder

  ## save the holders for later
d.holder.op1 <- d.holder
v.holder.op1 <- v.holder



save.image("text/madedata\\mention workspace5.RData")




par(mfrow=c(2,2))
plot(unlist(d.holder.iccpr),unlist(v.holder.iccpr), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
plot(unlist(d.holder.op1),unlist(v.holder.op1), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
plot(unlist(d.holder.cat),unlist(v.holder.cat), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)
plot(unlist(d.holder.art22),unlist(v.holder.art22), ylim=c(0,1),xlim=c(-365,365),
     pch=20,col="#00000010")
abline(v=0,lty=2)


## This calculates whether there is an increase in the word "visit"
## following ratification of each the treaties.

## Calculate the difference in rates of the word "visit"

visit.diff <- rep(NA,length(v.holder.iccpr))
for(i in 1:length(v.holder.iccpr)){
  visit.diff[i] <- sum(v.holder.iccpr[[i]][d.holder.iccpr[[i]]>=0]) -  sum(v.holder.iccpr[[i]][d.holder.iccpr[[i]]<0])
}
t.test(visit.diff)

visit.diff <- rep(NA,length(v.holder.op1))
for(i in 1:length(v.holder.op1)){
  visit.diff[i] <- sum(v.holder.op1[[i]][d.holder.op1[[i]]>=0]) -  sum(v.holder.op1[[i]][d.holder.op1[[i]]<0])
}
t.test(visit.diff)

visit.diff <- rep(NA,length(v.holder.cat))
for(i in 1:length(v.holder.cat)){
  visit.diff[i] <- sum(v.holder.cat[[i]][d.holder.cat[[i]]>=0]) -  sum(v.holder.cat[[i]][d.holder.cat[[i]]<0])
}
t.test(visit.diff)

visit.diff <- rep(NA,length(v.holder.art22))
for(i in 1:length(v.holder.art22)){
  visit.diff[i] <- sum(v.holder.art22[[i]][d.holder.art22[[i]]>=0]) -  sum(v.holder.art22[[i]][d.holder.art22[[i]]<0])
}
t.test(visit.diff)


####################################################
## Are there MORE mentions after ratification?


## ICCPR
holder <- rep(NA,length(d.holder.iccpr))
for(i in 1:length(holder)){
  myh <- d.holder.iccpr[[i]]
  holder[i] <- length(myh[myh>=0]) - length(myh[myh<0])
}
t.test(holder)

## OP1
holder <- rep(NA,length(d.holder.op1))
for(i in 1:length(holder)){
  myh <- d.holder.op1[[i]]
  holder[i] <- length(myh[myh>=0]) - length(myh[myh<0])
}
t.test(holder)

## CAT
holder <- rep(NA,length(d.holder.cat))
for(i in 1:length(holder)){
  myh <- d.holder.cat[[i]]
  holder[i] <- length(myh[myh>=0]) - length(myh[myh<0])
}
t.test(holder)

## ART22
holder <- rep(NA,length(d.holder.art22))
for(i in 1:length(holder)){
  myh <- d.holder.art22[[i]]
  holder[i] <- length(myh[myh>=0]) - length(myh[myh<0])
}
t.test(holder)




##########################################################
##########################################################
##########################################################
##########################################################











